My ML Project

Authors
Affiliation

Name I, First Name I

Name of the University

Name II, First Name II

Published

May 13, 2024

Abstract

The following machine learning project focuses on…

Warning: package 'ggplot2' was built under R version 4.3.1
Warning: package 'dplyr' was built under R version 4.3.1
Warning: package 'patchwork' was built under R version 4.3.1
Warning: package 'maps' was built under R version 4.3.1
Warning: package 'scales' was built under R version 4.3.1
Warning: package 'ggmap' was built under R version 4.3.1
Warning: package 'knitr' was built under R version 4.3.1
Warning: package 'rmarkdown' was built under R version 4.3.1
Warning: package 'lattice' was built under R version 4.3.1
Warning: package 'plotly' was built under R version 4.3.1
Warning: package 'leaflet' was built under R version 4.3.1

1 Introduction

  • Overview and Motivation
  • Related Work
  • Research questions

1.1 Setup

Code
print('hello')
#> [1] "hello"

1.1.1 Libraries

Code
# Python code
import numpy as np
print(np.mean([10, 20, 30, 40, 50]))
#> 30.0

2 Data

  • Sources
  • Description
  • Wrangling/cleaning
  • Spotting mistakes and missing data (could be part of EDA too)
  • Listing anomalies and outliers (could be part of EDA too)

2.1 Wrangling and Cleaning

  • ajouter source
  • ajouter description
  • expliquer blabla
  • Explain why we remove NA from m2 column.
  • Explain …

2.1.1 Raw dataset

Code
properties <- read.csv(file.path(here(),"data/properties.csv"))
# show 1000 first rows of properties using reactable
reactable(head(properties, 1000))
Code

# Create a tibble with cantons and observations
observations_table <- tibble(
  Canton = c("Vaud", "Bern", "Lucerne", "Zurich", "Uri", "Schwyz",
             "Obwalden", "Nidwalden", "Glarus", "St. Gallen", "Grisons", 
             "Aargau", "Thurgau", "Ticino", "Valais", "Neuchatel", 
             "Geneva", "Jura", "Zug", "Fribourg", "Solothurn", 
             "Basel-Stadt", "Basel-Landschaft", "Schaffhausen", 
             "Appenzell-Ausser-Rhoden", "Appenzell-Inner-Rhoden", "Total"),
  Observations = c(3232, 1553, 376, 1191, 71, 93, 29, 51, 55, 757, 405,
                   1481, 553, 4230, 3601, 513, 629, 329, 69, 1242, 590, 
                   149, 705, 118, 102, 12, sum(c(3232, 1553, 376, 1191, 71, 93, 29, 51, 55, 757, 405,
                                               1481, 553, 4230, 3601, 513, 629, 329, 69, 1242, 590, 
                                               149, 705, 118, 102, 12)))
)

# Display the table using kable and kableExtra
observations_table %>%
  kbl(caption = "Number of Observations by Canton") %>%
  kable_styling(position = "center", bootstrap_options = c("striped", "bordered", "hover")) %>%
  add_header_above(c(" " = 1, "Observations" = 1)) # Adds headers spanning columns
Number of Observations by Canton
Observations
Canton Observations
Vaud 3232
Bern 1553
Lucerne 376
Zurich 1191
Uri 71
Schwyz 93
Obwalden 29
Nidwalden 51
Glarus 55
St. Gallen 757
Grisons 405
Aargau 1481
Thurgau 553
Ticino 4230
Valais 3601
Neuchatel 513
Geneva 629
Jura 329
Zug 69
Fribourg 1242
Solothurn 590
Basel-Stadt 149
Basel-Landschaft 705
Schaffhausen 118
Appenzell-Ausser-Rhoden 102
Appenzell-Inner-Rhoden 12
Total 22136

2.1.2 Cleaning

Code
# Identify values causing the issue
problematic_values <- properties$number_of_rooms[is.na(as.numeric(properties$number_of_rooms))]
#> Warning: NAs introduced by coercion
# Replace non-numeric values with NA
#properties$number_of_rooms <- as.numeric(gsub("[^0-9.]", "", properties$number_of_rooms))

# Remove non-numeric characters and convert to numeric
properties$price <- as.numeric(gsub("[^0-9]", "", properties$price))

# Subset the dataset to exclude rows with price < 20000
properties <- properties[properties$price >= 20000, ]

# Subset the dataset to exclude rows with numbers of rooms < 25
#properties <- properties[properties$number_of_rooms <25, ]

# Replace incomplete addresses
properties$address <- gsub("^\\W*[.,0-]\\W*", "", properties$address)

properties_filtered <- na.omit(properties)

properties_filtered$year_category <- substr(properties_filtered$year_category, 1, 9)
# Assuming 'year_category' is a column in the 'properties' dataset
properties_filtered$year_category <- as.factor(properties_filtered$year_category)


# remove m^2 from column 'square_meters'
properties_filtered$square_meters <- as.numeric(gsub("\\D", "", properties_filtered$square_meters))
# print how many NA observations left in square_meters
print(sum(is.na(properties_filtered$square_meters)))
#> [1] 1056
# remove NA
properties_filtered <- properties_filtered[!is.na(properties_filtered$square_meters),]
# add majuscule to canton
properties_filtered$canton <- tools::toTitleCase(properties_filtered$canton)

# # Preprocess the number_of_rooms column
properties_filtered$number_of_rooms <- gsub(" rooms", "", properties_filtered$number_of_rooms)
properties_filtered$number_of_rooms <- gsub(" room", "", properties_filtered$number_of_rooms)
# Remove rows with "m²" in the "number_of_rooms" column
properties_filtered <- properties_filtered[!grepl("m²", properties_filtered$number_of_rooms), ]
properties_filtered$number_of_rooms <- as.numeric(properties_filtered$number_of_rooms)
# Remove rows with rooms >= 27
properties_filtered <- properties_filtered[properties_filtered$number_of_rooms <= 27, , drop = FALSE]


#test <- properties_filtered


# properties_filtered$number_of_rooms <- as.character(properties_filtered$number_of_rooms)
# properties_filtered$number_of_rooms <- gsub("\\D", properties_filtered$number_of_rooms)  # Remove non-numeric characters
# properties_filtered$number_of_rooms <- as.numeric(properties_filtered$number_of_rooms)       # Convert to numeric
# properties_filtered$number_of_rooms <- trunc(properties_filtered$number_of_rooms)             # Truncate non-integer values

# show 100 first row of cleaned dataset using reactable
reactable(head(properties_filtered, 100))
Code

#filter properties_filtered to contains only 'price', 'number_of_rooms', 'square_meters'
properties_summary <- properties_filtered[, c('price', 'number_of_rooms', 'square_meters')]
#summary statistics
summary(properties_summary)
#>      price          number_of_rooms square_meters 
#>  Min.   :   25000   Min.   : 1.00   Min.   :   1  
#>  1st Qu.:  690000   1st Qu.: 3.50   1st Qu.:  99  
#>  Median :  992340   Median : 4.50   Median : 136  
#>  Mean   : 1350782   Mean   : 4.98   Mean   : 159  
#>  3rd Qu.: 1550000   3rd Qu.: 5.50   3rd Qu.: 190  
#>  Max.   :26149500   Max.   :27.00   Max.   :2000
# Data
data <- data.frame(
  price = c(25000, 690000, 992340, 1348429, 1550000, 26149500),
  number_of_rooms = c(1.0, 35.0, 45.0, 41.1, 55.0, 185.0),
  square_meters = c(1, 98, 136, 159, 190, 2000)
)

# Summary statistics
summary_stats <- summary(data)

# Summary statistics
summary_stats <- cbind(
  Min = apply(data, 2, min),
  Q1 = apply(data, 2, quantile, probs = 0.25),
  Median = apply(data, 2, median),
  Mean = apply(data, 2, mean),
  Q3 = apply(data, 2, quantile, probs = 0.75),
  Max = apply(data, 2, max)
)

# Create table
table <- kbl(summary_stats, align = rep('c', 6), caption = "Summary statistics for the dataset") %>%
  kable_styling(position = "center", bootstrap_options = c("striped", "hover", "condensed", "responsive"))
table
Summary statistics for the dataset
Min Q1 Median Mean Q3 Max
price 25000 765585.0 1170384 5.13e+06 1.50e+06 26149500
number_of_rooms 1 36.5 43 6.04e+01 5.25e+01 185
square_meters 1 107.5 148 4.31e+02 1.82e+02 2000

2.1.3 AMTOVZ_CSV_LV95 Data

The dataset described is the “Official Index of Localities” (Répertoire officiel des localités) provided by the Swiss Federal Office of Topography (swisstopo). It contains comprehensive information about all localities in Switzerland and the Principality of Liechtenstein, including their names, postal codes, and perimeters.

This dataset is regularly updated on a monthly basis, incorporating changes reported by cantonal authorities and Swiss Post. It serves various purposes, including spatial analysis, integration with other geographic datasets, usage as a geolocated background in GIS (Geographic Information Systems) and CAD (Computer-Aided Design) systems, statistical analysis, and as a reference dataset for information systems.

Updates and release notes for the dataset are provided periodically, detailing changes and improvements made over time. The Swiss Federal Office of Topography manages and distributes this dataset as part of its responsibilities in collecting and providing official geospatial data for Switzerland.

Source - swisstopo

2.1.3.1 Creating Variable zip_code and merging with AMTOVZ_CSV_LV95

Code
df <- properties_filtered
#the address column is like : '1844 Villeneuve VD' and has zip code number in it
#taking out the zip code number and creating a new column 'zip_code'
#the way to identify the zip code is to identify numbers that are 4 digits long
df$zip_code <- as.numeric(gsub("\\D", "", df$address))
#removing the first two number of zip code has more than 4 number
df$zip_code <- ifelse(df$zip_code > 9999, df$zip_code %% 10000, df$zip_code)

2.1.3.2 Using AMTOVZ_CSV_LV95 to get the city and canton from the zip code

Code
#read .csv AMTOVZ_CSV_LV95
amto <- read.csv(file.path(here(),"data/AMTOVZ_CSV_WGS84.csv"), sep = ";")
#creating a new dataframe with 'Ortschaftsname' as 'City'Place_name', 'PLZ' as 'zip_code', 'KantonskÃ.rzel' as 'Canton_code', 'E' as 'lon' and 'N' as 'lat'
amto_df <- amto[, c('Gemeindename', 'PLZ', 'Kantonskürzel', 'E', 'N')]
#renaming the columns
colnames(amto_df) <- c('Community', 'zip_code', 'Canton_code', 'lon', 'lat')

#remove duplicates of zip code
amto_df <- amto_df[!duplicated(amto_df$zip_code),]

#add the variable of amto_df to the df if the zip code matches
df <- merge(df, amto_df, by = "zip_code", all.x = TRUE)

#check if there are nan in city
df[is.na(df$Community),]
#>       zip_code    price number_of_rooms square_meters
#> 1           25  2200000            10.0           263
#> 2           25  2200000             6.5           165
#> 3           26   655000             3.5            66
#> 4           26  1995000             7.5           180
#> 5          322   870000             2.5            59
#> 6          322   880000             2.5            55
#> 7          322   975000             3.5            56
#> 230       1014  1510000             5.5           146
#> 1137      1200   679000             5.5           142
#> 1138      1200 16092000             7.0           400
#> 1139      1200  3285450             5.0           230
#> 5479      1919  1065000             4.5           130
#> 5480      1919   785000             3.5           103
#> 5481      1919  2558620             5.5           270
#> 5482      1919  1908000             6.5           210
#> 7622      2500  1100000             5.0           154
#> 7623      2500   872500             4.5           144
#> 7624      2500   420000             4.5           115
#> 7625      2500  1450000             5.5           198
#> 7626      2500   885500             5.5           130
#> 7627      2500   872500             4.5           138
#> 7628      2500   892500             4.5           144
#> 7629      2500   885500             5.5           130
#> 7630      2500   877500             4.5           138
#> 7631      2500   870500             4.5           125
#> 7632      2500   887500             5.5           130
#> 7633      2500   887500             4.5           144
#> 7634      2500  1050000             4.5           121
#> 8326      3000   820000             5.5           165
#> 8327      3000  1140000             3.5           115
#> 8328      3000  1090000             3.5           115
#> 8329      3000   920000             4.5           157
#> 8330      3000  1090000             5.5           193
#> 8331      3000  1090000             5.5           193
#> 8332      3000   920000             4.5           157
#> 8333      3000   720000             3.5           102
#> 8334      3000  1590000             5.5           330
#> 10435     4000   180000             3.0            70
#> 10436     4000   975000             4.5           125
#> 10437     4000  2100000             6.5           360
#> 12360     5201   725000             3.5            95
#> 13213     6000   695000             4.5           133
#> 13966     6511   440000             2.0            64
#> 14242     6547 15000000             7.5           220
#> 14560     6602  2800000             6.5           250
#> 14561     6602  2800000             7.5           242
#> 14562     6602   270000             1.5            28
#> 14563     6602   450000             3.5            75
#> 14564     6604  1990000             4.5           220
#> 14565     6604  2668590             5.5           290
#> 14566     6604   760000             3.5            78
#> 16579     6901  3660930             4.5           290
#> 16580     6901  3660930             4.5           290
#> 16581     6903   790000             3.5           105
#> 16582     6907   995000             4.5           114
#> 16583     6907   995000             4.5           114
#> 16584     6911   469350             5.5           140
#> 16585     6911   737550             4.5            82
#> 16586     6911   660000             7.5           200
#> 16587     6911   610000             3.5           103
#> 17896     7133  2266290             5.5           160
#> 17905     7135  2690000             8.5           236
#> 18165     8000  2100000             4.5           152
#> 18166     8000  1650000             4.5           142
#> 18167     8000   925000             3.5           102
#> 18168     8000  1650000             4.5           142
#> 18169     8000  1150000             4.5           128
#> 18170     8000  1450000             5.5           143
#> 18171     8000  1990000             5.5           200
#> 18172     8000  1990000             5.5           200
#> 18173     8000   975000             4.5           122
#> 18174     8000  2495000             5.5           482
#> 18654     8238   245000             2.0            49
#> 19078     8423  2110000             6.5           204
#> 19079     8423  2190000             5.5           167
#> 20292     9241   545000             4.5           100
#> 20293     9241   730840             5.5           130
#>                                                  address
#> 1                                       1000 Lausanne 25
#> 2                                       1000 Lausanne 25
#> 3                                       1000 Lausanne 26
#> 4                          Lausanne 26, 1000 Lausanne 26
#> 5                    Via Cuolm Liung 30d, 7032 Laax GR 2
#> 6                                         7032 Laax GR 2
#> 7                       Via Murschetg 29, 7032 Laax GR 2
#> 230                                        1014 Lausanne
#> 1137  Chemin des pralets, 74100 Etrembières, 1200 Genève
#> 1138                                         1200 Genève
#> 1139                                         1200 Genève
#> 5479                                       1919 Martigny
#> 5480                                       1919 Martigny
#> 5481                                       1919 Martigny
#> 5482                                       1919 Martigny
#> 7622                                    2500 Biel/Bienne
#> 7623                                    2500 Biel/Bienne
#> 7624                                    2500 Biel/Bienne
#> 7625                                         2500 Bienne
#> 7626                                    2500 Biel/Bienne
#> 7627                                    2500 Biel/Bienne
#> 7628                                    2500 Biel/Bienne
#> 7629                                    2500 Biel/Bienne
#> 7630                                    2500 Biel/Bienne
#> 7631                                    2500 Biel/Bienne
#> 7632                                    2500 Biel/Bienne
#> 7633                                    2500 Biel/Bienne
#> 7634                     Hohlenweg 11b, 2500 Biel/Bienne
#> 8326                                           3000 Bern
#> 8327                                           3000 Bern
#> 8328                                           3000 Bern
#> 8329                                           3000 Bern
#> 8330                                           3000 Bern
#> 8331                                           3000 Bern
#> 8332                                           3000 Bern
#> 8333                                           3000 Bern
#> 8334                                           3000 Bern
#> 10435           Lörrach Brombach Steinsack 6, 4000 Basel
#> 10436                                         4000 Basel
#> 10437                                         4000 Basel
#> 12360                                      5201 Brugg AG
#> 13213   in TRIENGEN, ca. 20 min. bei Luzern, 6000 Luzern
#> 13966                                     6511 Cadenazzo
#> 14242                               Augio 1F, 6547 Augio
#> 14560                                       6602 Muralto
#> 14561                                       6602 Muralto
#> 14562                                       6602 Muralto
#> 14563                      Via Bacilieri 2, 6602 Muralto
#> 14564                                       6604 Solduno
#> 14565                                       6604 Solduno
#> 14566                                       6604 Locarno
#> 16579                                        6901 Lugano
#> 16580                                        6901 Lugano
#> 16581                                        6903 Lugano
#> 16582                                      6907 MASSAGNO
#> 16583                                      6907 MASSAGNO
#> 16584                             6911 Campione d'Italia
#> 16585                             6911 Campione d'Italia
#> 16586                             6911 Campione d'Italia
#> 16587                             6911 Campione d'Italia
#> 17896                  Inder Platenga 34, 7133 Obersaxen
#> 17905                                       7135 Fideris
#> 18165                                        8000 Zürich
#> 18166                                        8000 Zürich
#> 18167                                        8000 Zürich
#> 18168                                        8000 Zürich
#> 18169                                        8000 Zürich
#> 18170                                        8000 Zürich
#> 18171                                        8000 Zürich
#> 18172                                        8000 Zürich
#> 18173                                        8000 Zürich
#> 18174                                        8000 Zürich
#> 18654      Stemmerstrasse 14, 8238 Büsingen am Hochrhein
#> 19078                      Chüngstrasse 60, 8423 Embrach
#> 19079                      Chüngstrasse 48, 8423 Embrach
#> 20292                                       9241 Kradolf
#> 20293                                       9241 Kradolf
#>             canton    property_type floor year_category Community
#> 1             Vaud     Single house           1919-1945      <NA>
#> 2             Vaud            Villa           2006-2010      <NA>
#> 3             Vaud        Apartment noteg     2016-2024      <NA>
#> 4             Vaud            Villa           1961-1970      <NA>
#> 5          Grisons        Apartment    eg     2016-2024      <NA>
#> 6          Grisons        Apartment noteg     2016-2024      <NA>
#> 7          Grisons        Apartment noteg     2011-2015      <NA>
#> 230           Vaud        Apartment    eg     2011-2015      <NA>
#> 1137        Geneva Bifamiliar house           2016-2024      <NA>
#> 1138        Geneva     Single house           2011-2015      <NA>
#> 1139        Geneva Bifamiliar house           1981-1990      <NA>
#> 5479        Valais        Apartment noteg     2016-2024      <NA>
#> 5480        Valais        Apartment noteg     2016-2024      <NA>
#> 5481        Valais       Attic flat noteg     2016-2024      <NA>
#> 5482        Valais        Apartment noteg     2016-2024      <NA>
#> 7622          Bern     Single house           2001-2005      <NA>
#> 7623          Bern            Villa           2016-2024      <NA>
#> 7624          Bern        Apartment noteg     1971-1980      <NA>
#> 7625          Bern     Single house           2016-2024      <NA>
#> 7626          Bern            Villa           2016-2024      <NA>
#> 7627          Bern     Single house           2016-2024      <NA>
#> 7628          Bern     Single house           2016-2024      <NA>
#> 7629          Bern     Single house           2016-2024      <NA>
#> 7630          Bern     Single house           2016-2024      <NA>
#> 7631          Bern     Single house           2016-2024      <NA>
#> 7632          Bern     Single house           2016-2024      <NA>
#> 7633          Bern     Single house           2016-2024      <NA>
#> 7634          Bern     Single house           2001-2005      <NA>
#> 8326          Bern        Apartment noteg     2016-2024      <NA>
#> 8327          Bern        Apartment    eg     2016-2024      <NA>
#> 8328          Bern        Apartment    eg     2016-2024      <NA>
#> 8329          Bern        Apartment noteg     2016-2024      <NA>
#> 8330          Bern        Roof flat noteg     2016-2024      <NA>
#> 8331          Bern        Apartment noteg     2016-2024      <NA>
#> 8332          Bern           Duplex noteg     2016-2024      <NA>
#> 8333          Bern        Apartment    eg     2016-2024      <NA>
#> 8334          Bern        Apartment noteg     1991-2000      <NA>
#> 10435  Basel-Stadt     Single house           1961-1970      <NA>
#> 10436  Basel-Stadt     Single house           2016-2024      <NA>
#> 10437  Basel-Stadt            Villa           2016-2024      <NA>
#> 12360       Aargau        Apartment noteg     2016-2024      <NA>
#> 13213      Lucerne        Apartment noteg     1991-2000      <NA>
#> 13966       Ticino        Apartment noteg     2016-2024      <NA>
#> 14242      Grisons     Single house           2016-2024      <NA>
#> 14560       Ticino     Single house           1981-1990      <NA>
#> 14561       Ticino     Single house           1981-1990      <NA>
#> 14562       Ticino        Apartment    eg     1961-1970      <NA>
#> 14563       Ticino        Apartment noteg     1946-1960      <NA>
#> 14564       Ticino       Attic flat noteg     2011-2015      <NA>
#> 14565       Ticino        Apartment noteg     2011-2015      <NA>
#> 14566       Ticino        Apartment noteg     2011-2015      <NA>
#> 16579       Ticino       Attic flat noteg     2011-2015      <NA>
#> 16580       Ticino        Apartment noteg     2011-2015      <NA>
#> 16581       Ticino        Apartment noteg     2006-2010      <NA>
#> 16582       Ticino        Apartment noteg     2016-2024      <NA>
#> 16583       Ticino        Apartment noteg     2016-2024      <NA>
#> 16584       Ticino        Apartment noteg     1946-1960      <NA>
#> 16585       Ticino        Apartment noteg     1991-2000      <NA>
#> 16586       Ticino     Single house           1971-1980      <NA>
#> 16587       Ticino        Apartment    eg     1946-1960      <NA>
#> 17896      Grisons     Single house           2006-2010      <NA>
#> 17905      Grisons     Single house              0-1919      <NA>
#> 18165       Zurich        Apartment noteg     2016-2024      <NA>
#> 18166       Zurich       Attic flat noteg     2016-2024      <NA>
#> 18167       Zurich        Apartment noteg     2016-2024      <NA>
#> 18168       Zurich        Apartment noteg     2016-2024      <NA>
#> 18169       Zurich        Apartment noteg     2016-2024      <NA>
#> 18170       Zurich        Apartment    eg     2016-2024      <NA>
#> 18171       Zurich       Attic flat noteg     2006-2010      <NA>
#> 18172       Zurich        Apartment noteg     2006-2010      <NA>
#> 18173       Zurich     Single house           2016-2024      <NA>
#> 18174       Zurich        Apartment noteg        0-1919      <NA>
#> 18654 Schaffhausen        Apartment noteg     1961-1970      <NA>
#> 19078       Zurich Bifamiliar house           2016-2024      <NA>
#> 19079       Zurich     Single house           2016-2024      <NA>
#> 20292      Thurgau        Apartment noteg     1991-2000      <NA>
#> 20293      Thurgau        Apartment noteg     1991-2000      <NA>
#>       Canton_code lon lat
#> 1            <NA>  NA  NA
#> 2            <NA>  NA  NA
#> 3            <NA>  NA  NA
#> 4            <NA>  NA  NA
#> 5            <NA>  NA  NA
#> 6            <NA>  NA  NA
#> 7            <NA>  NA  NA
#> 230          <NA>  NA  NA
#> 1137         <NA>  NA  NA
#> 1138         <NA>  NA  NA
#> 1139         <NA>  NA  NA
#> 5479         <NA>  NA  NA
#> 5480         <NA>  NA  NA
#> 5481         <NA>  NA  NA
#> 5482         <NA>  NA  NA
#> 7622         <NA>  NA  NA
#> 7623         <NA>  NA  NA
#> 7624         <NA>  NA  NA
#> 7625         <NA>  NA  NA
#> 7626         <NA>  NA  NA
#> 7627         <NA>  NA  NA
#> 7628         <NA>  NA  NA
#> 7629         <NA>  NA  NA
#> 7630         <NA>  NA  NA
#> 7631         <NA>  NA  NA
#> 7632         <NA>  NA  NA
#> 7633         <NA>  NA  NA
#> 7634         <NA>  NA  NA
#> 8326         <NA>  NA  NA
#> 8327         <NA>  NA  NA
#> 8328         <NA>  NA  NA
#> 8329         <NA>  NA  NA
#> 8330         <NA>  NA  NA
#> 8331         <NA>  NA  NA
#> 8332         <NA>  NA  NA
#> 8333         <NA>  NA  NA
#> 8334         <NA>  NA  NA
#> 10435        <NA>  NA  NA
#> 10436        <NA>  NA  NA
#> 10437        <NA>  NA  NA
#> 12360        <NA>  NA  NA
#> 13213        <NA>  NA  NA
#> 13966        <NA>  NA  NA
#> 14242        <NA>  NA  NA
#> 14560        <NA>  NA  NA
#> 14561        <NA>  NA  NA
#> 14562        <NA>  NA  NA
#> 14563        <NA>  NA  NA
#> 14564        <NA>  NA  NA
#> 14565        <NA>  NA  NA
#> 14566        <NA>  NA  NA
#> 16579        <NA>  NA  NA
#> 16580        <NA>  NA  NA
#> 16581        <NA>  NA  NA
#> 16582        <NA>  NA  NA
#> 16583        <NA>  NA  NA
#> 16584        <NA>  NA  NA
#> 16585        <NA>  NA  NA
#> 16586        <NA>  NA  NA
#> 16587        <NA>  NA  NA
#> 17896        <NA>  NA  NA
#> 17905        <NA>  NA  NA
#> 18165        <NA>  NA  NA
#> 18166        <NA>  NA  NA
#> 18167        <NA>  NA  NA
#> 18168        <NA>  NA  NA
#> 18169        <NA>  NA  NA
#> 18170        <NA>  NA  NA
#> 18171        <NA>  NA  NA
#> 18172        <NA>  NA  NA
#> 18173        <NA>  NA  NA
#> 18174        <NA>  NA  NA
#> 18654        <NA>  NA  NA
#> 19078        <NA>  NA  NA
#> 19079        <NA>  NA  NA
#> 20292        <NA>  NA  NA
#> 20293        <NA>  NA  NA

We have 77 NAN, where

  • The zip code was not found in the atmo df
  • The zip code was incorectly isolated from the address

Removed them ::: {.cell layout-align=“center”}

Code
#remove the rows with nan in city
properties_filtered <- df[!is.na(df$Community),]
reactable(head(properties_filtered, 100))

:::

2.1.4 Tax data

  • source https://swisstaxcalculator.estv.admin.ch/#/taxdata/tax-rates
  • ajouter description
  • expliquer blabla

2.1.4.1 Cleaning

Code
# read csv
impots <- read.csv(file.path(here(),"data/estv_income_rates.csv"), sep = ",", header = TRUE, stringsAsFactors = FALSE)

# Remove 1st row
impots <- impots[-1, ]
# Remove 3rd column
impots <- impots[, -3]

# Combine text for columns 4-8
impots[1, 4:8] <- "Impôt sur le revenu"
# Combine text for columns 9-13
impots[1, 9:13] <- "Impôt sur la fortune"
# Combine text for columns 14-16
impots[1, 14:16] <- "Impôt sur le bénéfice"
# Combine text for columns 17-19
impots[1, 17:19] <- "Impôt sur le capital"

# Combine content of the first 2 rows into the 2nd row
impots[2, ] <- apply(impots[1:2, ], 2, function(x) paste(ifelse(is.na(x[1]), x[2], ifelse(is.na(x[2]), x[1], paste(x[1], x[2], sep = " ")))))

# Remove 1st row
impots <- impots[-1, ]

# Assign the text to the 1st row and 1st column
impots[1, 1] <- "Coefficient d'impôt en %"
# Replace column names with the content of the first row
colnames(impots) <- impots[1, ]
impots <- impots[-1, ]

# Check for missing values in impots
any_missing <- any(is.na(impots))

if (any_missing) {
  print("There are missing values in impots.")
} else {
  print("There are no missing values in impots.")
}
#> [1] "There are no missing values in impots."


# Replace row names with the content of the 3rd column
row.names(impots) <- impots[, 3]
impots <- impots[, -3]

# Remove 2nd column (to avoid canton column)
impots <- impots[, -2]
# Remove impot eglise
impots <- impots[, -c(4:6)]
impots <- impots[, -c(6:8)]
impots <- impots[, -8]
impots <- impots[, -10]
# Clean data and convert to numeric
cleaned_impots <- apply(impots, 2, function(x) as.numeric(gsub("[^0-9.-]", "", x)))

# Replace NA values with 0
cleaned_impots[is.na(cleaned_impots)] <- 0

# Check for non-numeric values
non_numeric <- sum(!is.na(cleaned_impots) & !is.numeric(cleaned_impots))
if (non_numeric > 0) {
  print(paste("Warning: Found", non_numeric, "non-numeric values."))
}

rownames(cleaned_impots) <- rownames(impots)

#reactable(head(cleaned_impots, 100))

2.1.5 Commune Data

2.1.5.1 Cleaning

  • ajouter source
  • ajouter description
  • expliquer blabla

Replaces NAs in both Taux de couverture social and Political (Conseil National Datas) For Taux de couverture Social: NAs were due to reason “Q” = “Not indicated to protect confidentiality” We replaced the NAs by the average taux de couverture in Switzerland in 2019, which was 3.2%

For Political data: NAs were due to reason “M” = “Not indicated because data was not important or applicable” Therefore, we replaced the NAs by 0

Code
# il faudra changer le path
commune_prep <- read.csv(file.path(here(),"data/commune_data.csv"), sep = ";", header = TRUE, stringsAsFactors = FALSE)

# We keep only 2019 to have some reference? (2020 is apparently not really complete)
commune_2019 <- subset(commune_prep, PERIOD_REF == "2019") %>%
  select(c("REGION", "CODE_REGION", "INDICATORS", "VALUE", "STATUS"))

# delete les lignes ou Status = Q ou M (pas de valeur) et ensuite on enlève la colonne
commune_2019 <- subset(commune_2019, STATUS == "A") %>%
  select(c("REGION", "CODE_REGION", "INDICATORS", "VALUE"))

# on enlève les lignes qui sont des aggrégats
commune_2019 <- subset(commune_2019, REGION != "Schweiz")

commune_2019 <- commune_2019 %>%
  pivot_wider(names_from = INDICATORS, values_from = VALUE)

# Rename columns using the provided map
commune <- commune_2019 %>%
  rename(`Population - Habitants` = Ind_01_01,
         `Population - Densité de la population` = Ind_01_03,
         `Population - Etrangers` = Ind_01_08,
         `Population - Part du groupe d'âge 0-19 ans` = Ind_01_04,
         `Population - Part du groupe d'âge 20-64 ans` = Ind_01_05,
         `Population - Part du groupe d'âge 65+ ans` = Ind_01_06,
         `Population - Taux brut de nuptialité` = Ind_01_09,
         `Population - Taux brut de divortialité` = Ind_01_10,
         `Population - Taux brut de natalité` = Ind_01_11,
         `Population - Taux brut de mortalité` = Ind_01_12,
         `Population - Ménages privés` = Ind_01_13,
         `Population - Taille moyenne des ménages` = Ind_01_14,
         `Sécurité sociale - Taux d'aide sociale` = Ind_11_01,
         `Conseil national - PLR` = Ind_14_01,
         `Conseil national - PDC` = Ind_14_02,
         `Conseil national - PS` = Ind_14_03,
         `Conseil national - UDC` = Ind_14_04,
         `Conseil national - PEV/PCS` = Ind_14_05,
         `Conseil national - PVL` = Ind_14_06,
         `Conseil national - PBD` = Ind_14_07,
         `Conseil national - PST/Sol.` = Ind_14_08,
         `Conseil national - PES` = Ind_14_09,
         `Conseil national - Petits partis de droite` = Ind_14_10)

# If no one voted for a party, set as NA -> replacing it with 0 instead
commune <- commune %>%
  mutate_at(vars(starts_with("Conseil national")), ~replace_na(., 0))


# Removing NAs from Taux de couverture sociale column
# Setting the mean as the mean for Switzerland in 2019 (3.2%)
mean_taux_aide_social <- 3.2

# Replace NA values with the mean
commune <- commune %>%
  mutate(`Sécurité sociale - Taux d'aide sociale` = if_else(is.na(`Sécurité sociale - Taux d'aide sociale`), mean_taux_aide_social, `Sécurité sociale - Taux d'aide sociale`))
#show 100 first rows of commune using reactable
reactable(head(commune, 100))
Code

# commune_prep <- read.csv(file.path(here(),"data/commune_data.csv"), sep = ";", header = TRUE, stringsAsFactors = FALSE)
# 
# # We keep only 2019 to have some reference? (2020 is apparently not really complete)
# commune_2019 <- subset(commune_prep, PERIOD_REF == "2019") %>%
#   select(c("REGION", "CODE_REGION", "INDICATORS", "VALUE", "STATUS"))
# 
# # delete les lignes ou Status = Q ou M (pas de valeur) et ensuite on enlève la colonne
# commune_2019 <- subset(commune_2019, STATUS == "A") %>%
#   select(c("REGION", "CODE_REGION", "INDICATORS", "VALUE"))
# 
# # on enlève les lignes qui sont des aggrégats
# commune_2019 <- subset(commune_2019, REGION != "Schweiz")
# 
# commune_2019 <- commune_2019 %>%
#   pivot_wider(names_from = INDICATORS, values_from = VALUE)
# 
# # Rename columns using the provided map
# commune <- commune_2019 %>%
#   rename(`Population - Habitants` = Ind_01_01,
#          `Population - Densité de la population` = Ind_01_03,
#          `Population - Etrangers` = Ind_01_08,
#          `Population - Part du groupe d'âge 0-19 ans` = Ind_01_04,
#          `Population - Part du groupe d'âge 20-64 ans` = Ind_01_05,
#          `Population - Part du groupe d'âge 65+ ans` = Ind_01_06,
#          `Population - Taux brut de nuptialité` = Ind_01_09,
#          `Population - Taux brut de divortialité` = Ind_01_10,
#          `Population - Taux brut de natalité` = Ind_01_11,
#          `Population - Taux brut de mortalité` = Ind_01_12,
#          `Population - Ménages privés` = Ind_01_13,
#          `Population - Taille moyenne des ménages` = Ind_01_14,
#          `Sécurité sociale - Taux d'aide sociale` = Ind_11_01,
#          `Conseil national - PLR` = Ind_14_01,
#          `Conseil national - PDC` = Ind_14_02,
#          `Conseil national - PS` = Ind_14_03,
#          `Conseil national - UDC` = Ind_14_04,
#          `Conseil national - PEV/PCS` = Ind_14_05,
#          `Conseil national - PVL` = Ind_14_06,
#          `Conseil national - PBD` = Ind_14_07,
#          `Conseil national - PST/Sol.` = Ind_14_08,
#          `Conseil national - PES` = Ind_14_09,
#          `Conseil national - Petits partis de droite` = Ind_14_10)
# 
# # If no one voted for a party, set as NA -> replacing it with 0 instead
# commune <- commune %>%
#   mutate_at(vars(starts_with("Conseil national")), ~replace_na(., 0))
# 
# 
# # Removing NAs from Taux de couverture sociale column
# # Setting the mean as the mean for Switzerland in 2019 (3.2%)
# mean_taux_aide_social <- 3.2
# 
# # Replace NA values with the mean
# commune <- commune %>%
#   mutate(`Sécurité sociale - Taux d'aide sociale` = if_else(is.na(`Sécurité sociale - Taux d'aide sociale`), mean_taux_aide_social, `Sécurité sociale - Taux d'aide sociale`))
# 

3 Unsupervised learning

  • Clustering and/or dimension reduction

Trying to Cluster commune datas to: 1. Reduce dimension 2. See similarities

A regarder, est-ce qu’on fait un cluster pour les datas politques + un cluster pour les data démographiques, ou est-ce qu’on regroupe tout?

Code
set.seed(123)

# Clustering demographic
cols_commune_demographic <- select(commune, -c("REGION", "CODE_REGION","Conseil national - PLR","Conseil national - PDC", "Conseil national - PS", "Conseil national - UDC", "Conseil national - PEV/PCS", "Conseil national - PVL", "Conseil national - PBD", "Conseil national - PST/Sol.", "Conseil national - PES", "Conseil national - Petits partis de droite"))

# Scale the columns, some are total numbers, some are percentages
cols_commune_demographic <- scale(cols_commune_demographic)

# Calculate the distance matrix
dist_matrix_demographic <- dist(cols_commune_demographic, method = "minkowski")

# Perform hierarchical clustering
hclust_model_demographic <- hclust(dist_matrix_demographic, method = "ward.D")

# Create dendrogram
dend_demo <- as.dendrogram(hclust_model_demographic)
dend_demo <- color_branches(dend_demo, k = 5) #Set number of cluster to 5, to keep the same scale for all our variables

plot(dend_demo, main = "Demographics - Hierarchical Clustering Dendrogram")

Code
# Clustering politics
set.seed(123)

cols_commune_politics <- select(commune, c("Conseil national - PLR","Conseil national - PDC", "Conseil national - PS", "Conseil national - UDC", "Conseil national - PEV/PCS", "Conseil national - PVL", "Conseil national - PBD", "Conseil national - PST/Sol.", "Conseil national - PES", "Conseil national - Petits partis de droite"))

# Scale the columns, some are total numbers, some are percentages
cols_commune_politics <- scale(cols_commune_politics)

# Calculate the distance matrix
dist_matrix_politics <- dist(cols_commune_politics, method = "minkowski")

# Perform hierarchical clustering
hclust_model_politics <- hclust(dist_matrix_politics, method = "ward.D")

# Create dendrogram
dend_pol <- as.dendrogram(hclust_model_politics)
dend_pol <- color_branches(dend_pol, k = 5) #Set number of cluster to 5, to keep the same scale for all our variables

plot(dend_pol, main = "Politics - Hierarchical Clustering Dendrogram")

To prevent introducing 10 new types of taxes, we conducted a clustering analysis on the tax dataset to identify which municipalities can be grouped together. Based on the within-cluster sum of squares, we found 5 clusters. These 5 distinct clusters will be assigned to properties to determine which municipalities are subject to a particular type of tax. ## Tax ::: {.cell layout-align=“center”}

Code
set.seed(123)

# Clean data and convert to numeric
cleaned_impots <- apply(impots, 2, function(x) as.numeric(gsub("[^0-9.-]", "", x)))
cleaned_impots[is.na(cleaned_impots)] <- 0  # Replace NA values with 0

# Scale the features
scaled_impots <- scale(cleaned_impots)

# Perform k-means clustering
k <- 2  # Initial guess for the number of clusters
kmeans_model <- kmeans(scaled_impots, centers = k)

# Check within-cluster sum of squares (elbow method)
wss <- numeric(10)
for (i in 1:10) {
  kmeans_model <- kmeans(scaled_impots, centers = i)
  wss[i] <- sum(kmeans_model$withinss)
}
#plot(1:10, wss, type = "b", xlab = "Number of Clusters", ylab = "Within groups sum of squares")

# Adjust k based on elbow method
k <- 5  

# Perform k-means clustering again with optimal k
kmeans_model <- kmeans(scaled_impots, centers = k)

# Assign cluster labels to dendrogram
clusters <- kmeans_model$cluster

# Plot dendrogram
#colored_dend <- color_branches(dend, k = 5)
#y_zoom_range <- c(0, 80)  # Adjust the y-axis range as needed

#plot(colored_dend, main = "Hierarchical Clustering Dendrogram", horiz = FALSE, ylim = y_zoom_range)

:::

Code
# Get the cluster centers
cluster_centers <- kmeans_model$centers

# Create a data frame with cluster centers
cluster_centers_df <- data.frame(cluster = 1:k, cluster_centers)

# Print cluster centers
print(cluster_centers_df)
#>   cluster Coefficient.d.impôt.en.. Impôt.sur.le.revenu.Canton
#> 1       1                    0.400                     -0.611
#> 2       2                    0.786                     -0.390
#> 3       3                   -0.318                     -0.266
#> 4       4                   -0.792                     -0.699
#> 5       5                   -0.941                      1.932
#>   Impôt.sur.le.revenu.Commune Impôt.sur.la.fortune.Canton
#> 1                     -0.0839                      -0.615
#> 2                     -0.5732                      -0.394
#> 3                      0.9387                      -0.270
#> 4                     -0.3119                      -0.690
#> 5                      1.3332                       1.933
#>   Impôt.sur.la.fortune.Commune Impôt.sur.le.bénéfice.Canton
#> 1                      -0.0849                        1.951
#> 2                      -0.5725                       -0.249
#> 3                       0.9383                       -0.869
#> 4                      -0.3128                       -0.709
#> 5                       1.3330                        1.402
#>   Impôt.sur.le.bénéfice.Commune Impôt.sur.le.capital.Canton
#> 1                       -1.3419                       1.879
#> 2                       -0.6444                      -0.276
#> 3                        1.9896                      -0.874
#> 4                        0.0565                      -0.718
#> 5                        0.8732                       1.492
#>   Impôt.sur.le.capital.Commune
#> 1                     -1.30508
#> 2                     -0.65169
#> 3                      1.81595
#> 4                      0.00514
#> 5                      1.01512

# Calculate the size of each cluster
cluster_sizes <- table(kmeans_model$cluster)

# Print cluster sizes
print(cluster_sizes)
#> 
#>   1   2   3   4   5 
#>  75 999 178 462 417

# Get the cluster labels
cluster_labels <- kmeans_model$cluster

# Convert cleaned_impots to a data frame
impots_cluster <- as.data.frame(cleaned_impots)

# Add the cluster labels to cleaned_impots
impots_cluster$cluster <- cluster_labels

rownames(impots_cluster) <- rownames(impots)

impots_cluster <- impots_cluster %>%
  rownames_to_column(var = "Community")
Code
# Preparing df_commune for merging with main dataset

df_commune <- select(commune, REGION)

df_commune$Demographic_cluster <- cutree(hclust_model_demographic, k = 5)
df_commune$Political_cluster <- cutree(hclust_model_politics, k = 5)

# Preparing to merge

merging <- inner_join(amto_df, df_commune, by = c("Community" = "REGION"))

impots_cluster_subset <- impots_cluster[, c("Community", "cluster")]
merging <- merging %>%
  left_join(impots_cluster_subset, by = "Community")

clusters_df <- merging %>%
  rename(Tax_cluster = cluster) %>%
  rename(Commune = Community)

clusters_df <- clusters_df %>%
  select(c("Commune", "zip_code", "Canton_code", "Demographic_cluster", "Political_cluster", "Tax_cluster"))

# Only NAs are for commune Brugg, (written Brugg (AG) in the other data set) -> j'entre le cluster à la mano
clusters_df$Tax_cluster[is.na(clusters_df$Tax_cluster)] <- 2

# adding it to our main data set:
properties_filtered <- merge(properties_filtered, clusters_df[, c("zip_code", "Demographic_cluster", "Political_cluster", "Tax_cluster")], by = "zip_code", all.x = TRUE)
Code
# Dropping 228 rows containing NAs after the merge

na_count <- sum(is.na(properties_filtered[, c("Demographic_cluster", "Political_cluster", "Tax_cluster")]))

# Print the result
if (na_count > 0) {
  print("There are NA values in the merged dataframe.")
  print(na_count)
} else {
  print("There are no NA values in the merged dataframe.")
}
#> [1] "There are NA values in the merged dataframe."
#> [1] 675

# Find rows with NA values in the specified columns
na_rows <- subset(properties_filtered, is.na(Demographic_cluster) | is.na(Political_cluster) | is.na(Tax_cluster))

# Drop the NA rows
properties_filtered <- anti_join(properties_filtered, na_rows, by = "zip_code")

4 EDA

4.1 Map representation of distribution of properties

Code
# Create a leaflet map with optimized markers
map <- leaflet(properties_filtered) %>%
  addTiles() %>%  # Add default OpenStreetMap tiles
  addProviderTiles(providers$Esri.NatGeoWorldMap) %>% # Add topographic maps for context
  addCircleMarkers(
    ~lon, ~lat,
    radius = 1.5,  # Smaller radius for the circle markers
    color = "#32012F",  # Specifying a color for the markers
    fillOpacity = 0.2,  # Semi-transparent fill
    stroke = FALSE,  # No border to the circle markers to reduce visual noise
    popup = ~paste("Price: ", price, "<br>",
                   "Rooms: ", number_of_rooms, "<br>",
                   "Type: ", property_type, "<br>",
                   "Year: ", year_category),
    label = ~paste("Price: ", price)  # Tooltip on hover
  ) %>% addLegend(
    position = "bottomright",  # Position the legend at the bottom right
    colors = "#32012F",  # Use the same color as the markers
    labels = "Properties"  # Label for the legend
  )

map$width <- "100%"  # Set the width of the map to 100%
map$height <- 600  # Set the height of the map to 600 pixels

map

4.2 Histogram of prices

Code
histogram_price <- ggplot(properties_filtered, aes(x = price)) +
  geom_histogram(binwidth = 100000, fill = "skyblue", color = "red") +
  labs(title = "Distribution of Prices",
       x = "Price",
       y = "Frequency") +
  theme_minimal()
# Convert ggplot object to plotly object
interactive_histogram_price <- ggplotly(histogram_price, width = 600, height = 400 )
# Display the interactive histogram
interactive_histogram_price

4.3 Histogram of prices for each property type

note : only price between 0 and 500000 so some outliers aren’t here

Code
# Create the ggplot object
histogram <- ggplot(properties_filtered, aes(x = price)) +
  geom_histogram(binwidth = 100000, fill = "skyblue", color = "black") +
  facet_wrap(~ property_type, scales = "free", ncol = 2) +
  labs(title = "Distribution of Prices by Property Type",
       x = "Price",
       y = "Frequency") +
  theme_minimal() +
  xlim(0, 5000000)

# Convert ggplot object to plotly object
interactive_histogram <- ggplotly(histogram, width = 600, height = 1000)

# Display the interactive plot
interactive_histogram

4.4 Histogram of prices for each year category

note : only price between 0 and 500000 so some outliers aren’t here

Code
# Create a histogram of prices for each year category
histogram <- ggplot(properties_filtered, aes(x = price)) +
  geom_histogram(binwidth = 100000, fill = "skyblue", color = "black") +
  facet_wrap(~ year_category, scales = "free", ncol = 2) +
  labs(title = "Distribution of Prices by Year Category",
       x = "Price",
       y = "Frequency") +
  theme_minimal() +
  xlim(0, 5000000)
# Convert ggplot object to plotly object
interactive_histogram_year <- ggplotly(histogram, width = 600, height = 1000)
# Display the interactive plot
interactive_histogram_year

4.5 Histogram of prices for each canton

note : only price between 0 and 500000 so some outliers aren’t here

Code
histogram <- ggplot(properties_filtered, aes(x = price)) +
  geom_histogram(binwidth = 100000, fill = "skyblue", color = "black") +
  facet_wrap(~ canton, scales = "free", ncol = 2) +
  labs(title = "Distribution of Prices by Canton for properties between 0 and 5 million",
       x = "Price",
       y = "Frequency") +
  theme_minimal() +
  xlim(0, 5000000)

# Convert ggplot object to plotly object with adjusted height
interactive_histogram <- ggplotly(histogram, width = 600, height = 1000) %>%
  layout(height = 1000)  # Adjust the height as needed

# Display the interactive plot
interactive_histogram

4.6 Histogram of prices for each number of rooms

note : only price between 0 and 500000 so some outliers aren’t here

Code
# Create a histogram of prices for each number of rooms
histogram <- ggplot(properties_filtered, aes(x = price)) +
  geom_histogram(binwidth = 100000, fill = "skyblue", color = "black") +
  facet_wrap(~ number_of_rooms, scales = "free", ncol = 2) +
  labs(title = "Distribution of Prices by Number of Rooms",
       x = "Price",
       y = "Frequency") +
  theme_minimal() +
  xlim(0, 5000000)

# Convert ggplot object to plotly object with adjusted height
interactive_histogram <- ggplotly(histogram, width = 600, height = 1000)%>%
  layout(height = 2000)

# Display the interactive plot
interactive_histogram

4.7 Histogram of properties by square meters

Code
histogram <- ggplot(properties_filtered, aes(x = square_meters)) +
  geom_histogram(binwidth = 15, fill = "skyblue", color = "black") +
  labs(title = "Distribution of Properties by Square Meters",
       x = "Square Meters",
       y = "Frequency") +
  theme_minimal() +
  xlim(0, 2000)

# Convert ggplot object to plotly object with adjusted height
interactive_histogram <- ggplotly(histogram, width = NULL, height = NULL)  # Adjust width and height as needed

# Display the interactive plot
interactive_histogram

4.8 Histogram of prices with impot

Code
# Create the boxplot
boxplot <- ggplot(properties_filtered, aes(x = as.factor(Tax_cluster), y = price)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(title = "Boxplot of Property Prices by Tax Cluster",
       x = "Tax Cluster",
       y = "Price") +
  theme_minimal() +
  ylim(100000, 400000)

# Convert ggplot object to plotly object
interactive_boxplot <- ggplotly(boxplot)
interactive_boxplot
Code
impot_cols <- names(properties_filtered)[startsWith(names(properties_filtered), "Impôt")]

# Count the number of NA values in selected columns
na_counts <- colSums(is.na(properties_filtered[impot_cols]))

# Print the counts
print(na_counts)
#> numeric(0)

5 Conclusion

  • Brief summary of the project
  • Take home message
  • Limitations
  • Future work?